package com.dut.bbs.data;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.dut.bbs.util.URLConstant;
import com.dut.bbs.web.HtmlSource;

public class ParseCategory {
	/**
	 * 
	 * @param url
	 * @return 返回的title格式如下：版面或目录|版面或目录名|，使用时可用String.split("|")方法获取名字
	 */
	public static List<Map<String, String>> parse(String url) throws Exception {
		List<Map<String, String>> listResult = new ArrayList<Map<String, String>>();
		String html = HtmlSource.getHtml_data(url);
		Document doc = null;
		doc = Jsoup.parse(html);
		Elements eles = doc.getElementsByClass("list");
		for (Element e : eles.get(0).children()) {
			Map<String, String> tMap = new HashMap<String, String>();
			if (e.text().startsWith("目录")) {
				tMap.put("title", e.text());
				tMap.put("url", URLConstant.URL_BASE + e.child(1).attr("href"));
			} else {
				tMap.put("title", e.text());
				tMap.put("url", URLConstant.URL_BASE + e.child(0).attr("href"));
			}
			listResult.add(tMap);
		}
		return listResult;
	}
}
